In [1]:
import numpy as np
import matplotlib.pyplot as pl
import seaborn as sn
import emcee
%matplotlib inline
Let us generate a set of points following a straight line and add some noise
In [44]:
nPoints = 6
a = 1.2
b = 0.5
sigma = 0.2
x = 3.0 * np.random.rand(nPoints)
x = np.sort(x)
y = a*x + b
yNoise = y + sigma*np.random.randn(nPoints)
In [45]:
f, ax = pl.subplots()
ax.plot(x, y)
ax.errorbar(x, yNoise, yerr=sigma, fmt='.')
Out[45]:
The best fit is obtained by computing the likelihood and maximizing it (equivalent to the least-squares solution)
In [46]:
coef = np.polyfit(x, yNoise, 1)
print coef
In [47]:
f, ax = pl.subplots()
ax.errorbar(x, yNoise, yerr=sigma, fmt='.')
ax.plot(x, coef[0]*x + coef[1])
Out[47]:
But we want to also compute the uncertainty and possible correlation between the parameters. For this reason, we sample the likelihood using a Markov Chain Monte Carlo algorithm.
In [48]:
class linearFit(object):
def __init__(self, x, y, noise):
self.x = x
self.y = y
self.noise = noise
self.upper = 5.0
self.lower = -5.0
def logPosterior(self, theta):
if ( (np.any(theta < self.lower)) | (np.any(theta > self.upper)) ):
return -np.inf
else:
model = theta[0] * self.x + theta[1]
return -np.sum((self.y-model)**2 / (2.0*self.noise**2))
def sample(self):
ndim, nwalkers = 2, 500
self.theta0 = np.asarray([1.0,1.0])
p0 = [self.theta0 + 0.01*np.random.randn(ndim) for i in range(nwalkers)]
self.sampler = emcee.EnsembleSampler(nwalkers, ndim, self.logPosterior)
self.sampler.run_mcmc(p0, 100)
In [49]:
out = linearFit(x, yNoise, sigma)
out.sample()
aSamples = out.sampler.chain[:,-10:,0].flatten()
bSamples = out.sampler.chain[:,-10:,1].flatten()
In [50]:
f, ax = pl.subplots(ncols=2, nrows=2, figsize=(12,8))
ax[0,0].plot(aSamples)
ax[0,1].hist(aSamples)
ax[0,0].set_ylabel('a')
ax[0,1].set_xlabel('a')
ax[1,0].plot(bSamples)
ax[1,1].hist(bSamples)
ax[1,0].set_ylabel('b')
ax[1,1].set_xlabel('b')
pl.tight_layout()
In [51]:
pl.hexbin(aSamples,bSamples)
Out[51]:
In [52]:
print "mu_a={0} - sigma_a={1}".format(np.mean(aSamples),np.std(aSamples))
print "mu_b={0} - sigma_b={1}".format(np.mean(bSamples),np.std(bSamples))
In [ ]: